#!/usr/bin/env python3
# This tool generates emoji_names.py from a CSV file passed in on the command line.
#
# The CSV files come from a Google Sheets document, because that's a
# better format for reviewing all the emoji and thinking about what
# names and aliases make the most sense; this script converts the
# easily exported CSV one can get from Google Sheets into the
# emoji_names.py format for consumption by the rest of our emoji
# tooling.  We check in emoji_names.py (not the CSV) whenever we rerun
# this tool to update the data.
import argparse
import csv
import os
import re
import textwrap
from typing import Any, Dict, List, Set

EMOJI_DIR_PATH = os.path.dirname(os.path.abspath(__file__))

ACTIVE_ENTRY = (
    "%(explanation)s"
    "\n    '%(emoji_code)s': {'canonical_name': '%(canonical_name)s', 'aliases': %(aliases)s},"
)

INACTIVE_ENTRY = (
    "%(explanation)s"
    "\n    # '%(emoji_code)s': {'canonical_name': '%(canonical_name)s', 'aliases': %(aliases)s},"
)

FILE_TEMPLATE = (
    "from typing import Any, Dict\n\n"
    "EMOJI_NAME_MAPS: Dict[str, Dict[str, Any]] = {"
    "%(emoji_entries)s\n"
    "}\n"
)

emoji_names: Set[str] = set()


def load_data(data_file: str) -> List[List[str]]:
    emoji_name_data: List[List[str]] = []
    with open(data_file, newline="") as fp:
        data = csv.reader(fp)
        for row in data:
            emoji_name_data.append(row)
    return emoji_name_data[1:]


def check_uniqueness(emoji_name: str) -> None:
    if emoji_name in emoji_names:
        raise Exception(f"Duplicate emoji name: {emoji_name}")
    emoji_names.add(emoji_name)


def check_valid_emoji_name(emoji_name: str) -> None:
    if re.fullmatch("[+-]?[a-z0-9_-]+", emoji_name) is None:
        raise Exception(f"Invalid emoji name: {emoji_name}")


def check_emoji_names(canonical_name: str, aliases: List[str]) -> None:
    if canonical_name == "X":
        return
    names_to_check = [canonical_name, *aliases]
    for name in names_to_check:
        check_valid_emoji_name(name)
        check_uniqueness(name)


def prepare_explanation(explanation: str) -> str:
    if explanation == "":
        return ""

    wrapper_config: Dict[str, Any] = {
        "width": 80,
        "break_long_words": False,
        "break_on_hyphens": False,
        "initial_indent": "\n    # ",
        "subsequent_indent": "\n    # ",
    }
    wrapped_lines = textwrap.wrap(explanation.strip(), **wrapper_config)
    return "".join(wrapped_lines)


def prepare_aliases(aliases: str) -> List[str]:
    if aliases == "":
        return []
    return [alias.strip() for alias in aliases.split(",")]


def main() -> None:
    description = (
        "This script is used for generating `emoji_names.py`. It takes the "
        "path of an csv file containing the required data and optional output "
        "file path."
    )
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument(
        "--input-file",
        dest="input_file_path",
        metavar="<path>",
        default=os.path.join(EMOJI_DIR_PATH, "emoji_names.csv"),
        help="Path to the csv file from which data is to be read.",
    )
    parser.add_argument(
        "--output-file",
        dest="output_file_path",
        metavar="<path>",
        default=os.path.join(EMOJI_DIR_PATH, "emoji_names.py"),
        help="Path to the output file.",
    )

    args = parser.parse_args()
    emoji_name_data = load_data(args.input_file_path)
    emoji_entry = ""
    emoji_entries = ""

    for row in emoji_name_data:
        emoji_code = row[0]
        canonical_name = row[2]
        aliases = row[3]
        explanation = row[4]

        formatted_explanation = prepare_explanation(explanation)
        extracted_aliases = prepare_aliases(aliases)
        check_emoji_names(canonical_name, extracted_aliases)

        context = {
            "emoji_code": emoji_code,
            "canonical_name": canonical_name,
            "aliases": extracted_aliases,
            "explanation": formatted_explanation,
        }
        if canonical_name == "X":
            emoji_entry = INACTIVE_ENTRY % dict(**context)
        else:
            emoji_entry = ACTIVE_ENTRY % dict(**context)

        emoji_entries += emoji_entry

    with open(args.output_file_path, "w") as fp:
        fp.write(FILE_TEMPLATE % {"emoji_entries": emoji_entries})


if __name__ == "__main__":
    main()
